{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import stats_can\n",
    "\n",
    "# CHANGE\n",
    "path_output = '' # curated data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# JVWS data by NAICS is only available at national level (data starts in Apr 2015 and has 2 months lag)\n",
    "# UNADJUSTED FOR SEASONALITY\n",
    "data_description = {\n",
    "    #Monthly\n",
    "    \"agriculture_forestry\": 'v1212389406', # Agriculture, forestry, fishing and hunting\n",
    "    \"mining\": 'v1212389409',               # Mining, quarrying, and oil and gas extraction\n",
    "    \"utilities\": 'v1212389412', \n",
    "    \"construction\": 'v1212389415', \n",
    "    \"manufacturing\": 'v1212389418', \n",
    "    \"wholesale_trade\": 'v1212389421', # join wholesale with retail trade to get trade\n",
    "    \"retail_trade\": 'v1212389424',\n",
    "    \"transport\": 'v1212389427',       # Transportation and warehousing\n",
    "    \"info\": 'v1212389430',            # Information and cultural industries; join info with arts to get info equivalence\n",
    "    \"arts\": 'v1212389454',            # Arts, entertainment and recreation\n",
    "    \"finance\": 'v1212389433',         # Finance and insurance; join finance with real estate to get finance equivalence\n",
    "    \"real_estate\": 'v1212389436',     # Real estate and rental and leasing\n",
    "    \"professional_services\": 'v1212389439',  # Professional, scientific and technical services \n",
    "    \"manag_comp\": 'v1212389442',        # Management of companies and enterprises; join this with admin and support to get business\n",
    "    \"admin\": 'v1212389445',           # Administrative and support, waste management and remediation services\n",
    "    \"public_admin\": 'v1212389463',    # Public administration\n",
    "    \"education\": 'v1212389448',       # Education services\n",
    "    \"health\": 'v1212389451',          # Health care and social assistance\n",
    "    \"accommodation\": 'v1212389457',   # Accommodation and food services\n",
    "    \"other\": 'v1212389460',\n",
    "    \"total\": 'v1212389364',\n",
    "     }\n",
    "\n",
    "dfs = []\n",
    "for name, ticker in data_description.items():\n",
    "    df = stats_can.sc.vectors_to_df(ticker, periods='150') \n",
    "    df.columns = [name]\n",
    "    dfs.append(df)\n",
    "\n",
    "data = pd.concat(dfs, axis=1).reset_index().rename(columns={'refPer': 'date'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# FOR RESULTS IN THE PAPER: trim the data in Aug. 2024\n",
    "data = data[data['date']<'2024-09-01']\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#categories mapped to 2 digit NAICS code\n",
    "\n",
    "data.rename(columns = {'agriculture_forestry': 'jvws_11',\n",
    "                                      'mining': 'jvws_21',\n",
    "                                      'utilities': 'jvws_22',\n",
    "                                      'construction': 'jvws_23', \n",
    "                                      'manufacturing': 'jvws_31', # Safegraph has each category separately\n",
    "                                      'wholesale_trade': 'jvws_41', # Safegraph has it as 42\n",
    "                                      'retail_trade': 'jvws_44', # Safegraph has 44 and 45 separately \n",
    "                                      'transport': 'jvws_48', # Safegraph has 48 and 49 separately\n",
    "                                      'info': 'jvws_51', \n",
    "                                      'arts': 'jvws_71',\n",
    "                                      'finance': 'jvws_52', \n",
    "                                      'real_estate': 'jvws_53', \n",
    "                                      'professional_services': 'jvws_54',\n",
    "                                      'manag_comp': 'jvws_55', \n",
    "                                      'admin': 'jvws_56', \n",
    "                                      'public_admin': 'jvws_91', \n",
    "                                      'education': 'jvws_61',\n",
    "                                      'health': 'jvws_62', \n",
    "                                      'accommodation': 'jvws_72', \n",
    "                                      'other': 'jvws_81'\n",
    "                                       }, inplace=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save\n",
    "data.to_csv(path_output+'jvws_naics_month.csv', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.10.11 ('env_indeed2')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  },
  "vscode": {
   "interpreter": {
    "hash": "26da711ab583a058e13fb43990c4acfd219f633b35c618763404a0fc5624b2b9"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
